fields.py 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273
  1. from __future__ import absolute_import
  2. import email.utils
  3. import mimetypes
  4. import re
  5. from .packages import six
  6. def guess_content_type(filename, default="application/octet-stream"):
  7. """
  8. Guess the "Content-Type" of a file.
  9. :param filename:
  10. The filename to guess the "Content-Type" of using :mod:`mimetypes`.
  11. :param default:
  12. If no "Content-Type" can be guessed, default to `default`.
  13. """
  14. if filename:
  15. return mimetypes.guess_type(filename)[0] or default
  16. return default
  17. def format_header_param_rfc2231(name, value):
  18. """
  19. Helper function to format and quote a single header parameter using the
  20. strategy defined in RFC 2231.
  21. Particularly useful for header parameters which might contain
  22. non-ASCII values, like file names. This follows RFC 2388 Section 4.4.
  23. :param name:
  24. The name of the parameter, a string expected to be ASCII only.
  25. :param value:
  26. The value of the parameter, provided as ``bytes`` or `str``.
  27. :ret:
  28. An RFC-2231-formatted unicode string.
  29. """
  30. if isinstance(value, six.binary_type):
  31. value = value.decode("utf-8")
  32. if not any(ch in value for ch in '"\\\r\n'):
  33. result = u'%s="%s"' % (name, value)
  34. try:
  35. result.encode("ascii")
  36. except (UnicodeEncodeError, UnicodeDecodeError):
  37. pass
  38. else:
  39. return result
  40. if six.PY2: # Python 2:
  41. value = value.encode("utf-8")
  42. # encode_rfc2231 accepts an encoded string and returns an ascii-encoded
  43. # string in Python 2 but accepts and returns unicode strings in Python 3
  44. value = email.utils.encode_rfc2231(value, "utf-8")
  45. value = "%s*=%s" % (name, value)
  46. if six.PY2: # Python 2:
  47. value = value.decode("utf-8")
  48. return value
  49. _HTML5_REPLACEMENTS = {
  50. u"\u0022": u"%22",
  51. # Replace "\" with "\\".
  52. u"\u005C": u"\u005C\u005C",
  53. u"\u005C": u"\u005C\u005C",
  54. }
  55. # All control characters from 0x00 to 0x1F *except* 0x1B.
  56. _HTML5_REPLACEMENTS.update(
  57. {
  58. six.unichr(cc): u"%{:02X}".format(cc)
  59. for cc in range(0x00, 0x1F + 1)
  60. if cc not in (0x1B,)
  61. }
  62. )
  63. def _replace_multiple(value, needles_and_replacements):
  64. def replacer(match):
  65. return needles_and_replacements[match.group(0)]
  66. pattern = re.compile(
  67. r"|".join([re.escape(needle) for needle in needles_and_replacements.keys()])
  68. )
  69. result = pattern.sub(replacer, value)
  70. return result
  71. def format_header_param_html5(name, value):
  72. """
  73. Helper function to format and quote a single header parameter using the
  74. HTML5 strategy.
  75. Particularly useful for header parameters which might contain
  76. non-ASCII values, like file names. This follows the `HTML5 Working Draft
  77. Section 4.10.22.7`_ and matches the behavior of curl and modern browsers.
  78. .. _HTML5 Working Draft Section 4.10.22.7:
  79. https://w3c.github.io/html/sec-forms.html#multipart-form-data
  80. :param name:
  81. The name of the parameter, a string expected to be ASCII only.
  82. :param value:
  83. The value of the parameter, provided as ``bytes`` or `str``.
  84. :ret:
  85. A unicode string, stripped of troublesome characters.
  86. """
  87. if isinstance(value, six.binary_type):
  88. value = value.decode("utf-8")
  89. value = _replace_multiple(value, _HTML5_REPLACEMENTS)
  90. return u'%s="%s"' % (name, value)
  91. # For backwards-compatibility.
  92. format_header_param = format_header_param_html5
  93. class RequestField(object):
  94. """
  95. A data container for request body parameters.
  96. :param name:
  97. The name of this request field. Must be unicode.
  98. :param data:
  99. The data/value body.
  100. :param filename:
  101. An optional filename of the request field. Must be unicode.
  102. :param headers:
  103. An optional dict-like object of headers to initially use for the field.
  104. :param header_formatter:
  105. An optional callable that is used to encode and format the headers. By
  106. default, this is :func:`format_header_param_html5`.
  107. """
  108. def __init__(
  109. self,
  110. name,
  111. data,
  112. filename=None,
  113. headers=None,
  114. header_formatter=format_header_param_html5,
  115. ):
  116. self._name = name
  117. self._filename = filename
  118. self.data = data
  119. self.headers = {}
  120. if headers:
  121. self.headers = dict(headers)
  122. self.header_formatter = header_formatter
  123. @classmethod
  124. def from_tuples(cls, fieldname, value, header_formatter=format_header_param_html5):
  125. """
  126. A :class:`~urllib3.fields.RequestField` factory from old-style tuple parameters.
  127. Supports constructing :class:`~urllib3.fields.RequestField` from
  128. parameter of key/value strings AND key/filetuple. A filetuple is a
  129. (filename, data, MIME type) tuple where the MIME type is optional.
  130. For example::
  131. 'foo': 'bar',
  132. 'fakefile': ('foofile.txt', 'contents of foofile'),
  133. 'realfile': ('barfile.txt', open('realfile').read()),
  134. 'typedfile': ('bazfile.bin', open('bazfile').read(), 'image/jpeg'),
  135. 'nonamefile': 'contents of nonamefile field',
  136. Field names and filenames must be unicode.
  137. """
  138. if isinstance(value, tuple):
  139. if len(value) == 3:
  140. filename, data, content_type = value
  141. else:
  142. filename, data = value
  143. content_type = guess_content_type(filename)
  144. else:
  145. filename = None
  146. content_type = None
  147. data = value
  148. request_param = cls(
  149. fieldname, data, filename=filename, header_formatter=header_formatter
  150. )
  151. request_param.make_multipart(content_type=content_type)
  152. return request_param
  153. def _render_part(self, name, value):
  154. """
  155. Overridable helper function to format a single header parameter. By
  156. default, this calls ``self.header_formatter``.
  157. :param name:
  158. The name of the parameter, a string expected to be ASCII only.
  159. :param value:
  160. The value of the parameter, provided as a unicode string.
  161. """
  162. return self.header_formatter(name, value)
  163. def _render_parts(self, header_parts):
  164. """
  165. Helper function to format and quote a single header.
  166. Useful for single headers that are composed of multiple items. E.g.,
  167. 'Content-Disposition' fields.
  168. :param header_parts:
  169. A sequence of (k, v) tuples or a :class:`dict` of (k, v) to format
  170. as `k1="v1"; k2="v2"; ...`.
  171. """
  172. parts = []
  173. iterable = header_parts
  174. if isinstance(header_parts, dict):
  175. iterable = header_parts.items()
  176. for name, value in iterable:
  177. if value is not None:
  178. parts.append(self._render_part(name, value))
  179. return u"; ".join(parts)
  180. def render_headers(self):
  181. """
  182. Renders the headers for this request field.
  183. """
  184. lines = []
  185. sort_keys = ["Content-Disposition", "Content-Type", "Content-Location"]
  186. for sort_key in sort_keys:
  187. if self.headers.get(sort_key, False):
  188. lines.append(u"%s: %s" % (sort_key, self.headers[sort_key]))
  189. for header_name, header_value in self.headers.items():
  190. if header_name not in sort_keys:
  191. if header_value:
  192. lines.append(u"%s: %s" % (header_name, header_value))
  193. lines.append(u"\r\n")
  194. return u"\r\n".join(lines)
  195. def make_multipart(
  196. self, content_disposition=None, content_type=None, content_location=None
  197. ):
  198. """
  199. Makes this request field into a multipart request field.
  200. This method overrides "Content-Disposition", "Content-Type" and
  201. "Content-Location" headers to the request parameter.
  202. :param content_type:
  203. The 'Content-Type' of the request body.
  204. :param content_location:
  205. The 'Content-Location' of the request body.
  206. """
  207. self.headers["Content-Disposition"] = content_disposition or u"form-data"
  208. self.headers["Content-Disposition"] += u"; ".join(
  209. [
  210. u"",
  211. self._render_parts(
  212. ((u"name", self._name), (u"filename", self._filename))
  213. ),
  214. ]
  215. )
  216. self.headers["Content-Type"] = content_type
  217. self.headers["Content-Location"] = content_location